---
title: 'Bureaucratic Autonomy Government Quality: Multilevel & GEE Analysis'
author: "Bersch"
date: "07/07/2020"
output:
  html_document: default
  pdf_document: default
  word_document: default
---
```{r echo=FALSE, include=FALSE}
knitr::opts_chunk$set(echo = TRUE)
```

```{r, include=FALSE}
options(tinytex.verbose = TRUE)
```

################################################################################################
#### This analyses draws on an alternative specification of variables that excludes admin data
################################################################################################

# Load Libraries
```{r  Load Libraries, include=FALSE}
library(dplyr)
library(ggplot2)
library(interplot)
library(grid)
library(gridExtra)
library(stargazer)
library(broom)
library(geepack)
library(lme4)
library(lmerTest)
library(Hmisc)
library(useful)
library(BBmisc)
#library(tab)
library(magrittr)
library(MESS)
library(sjPlot)
library(ggeffects)
library(effects)
library(psych)
library(apaTables)
```

# Final multilevel models:step-by-step
# Let's use complete case only 
```{r Complete cases only, echo=FALSE}
reg.data1 <- read.csv("SM_CalibratingAutonomy.csv")
reg.data1<- na.omit(reg.data1)# We omit cases with missing values on the important variables 
nrow(reg.data1[!complete.cases(reg.data1), ])/nrow(reg.data1)*100 # Now we see that there are no missing data 
nrow(reg.data1) # Now we have 2597 observations
```
# Unconditional model: A model with no predictors
# Substantive question this model addresses: How much of the variation in perceived goverment quality is there within-and between- agencies?
```{r Final uncoditional model, echo=FALSE}
katherine.unconditional.model <- lmer(Quality~1 + (1 | AGID),data = reg.data1) # This is an unconditional model that helps to evalaute if there is a significant proportion of varaince at agency level. If there is no significant varaince that can be attributed to cluster (i.e., agency), one simply can use a standard single-level multiple regression. In this model, we used the orginal data with missing values. In the subsequent analyses, we used compelete cases.  
summary(katherine.unconditional.model)
```
# ICC: Percentage of variance at agency level
```{r Percentage of Variance at agency level, echo=FALSE}
Calculate_ICC <- function(my.lmer.object){
  variance <- my.lmer.object %>% VarCorr %>% as.data.frame
  ICC <- variance$vcov[1]/(variance$vcov[1]+variance$vcov[2])
  return(ICC) 
}                       # Function to calculate ICC. ICC can also be calculated by hand[varaince at upper level divided by the sum of varaince at upper and lower levels]
Variance.at.agency.level<-Calculate_ICC(katherine.unconditional.model) %>%    # 
  round(2)
Variance.at.agency.level<- Variance.at.agency.level*100
Variance.at.agency.level
```
# Grand mean centering of predictors.
```{r Grand mean cenetering of the predictors, echo=FALSE}
# Grand mean centering enables interpretation of intercept. For instance, the intercept for a conditional model in which discretion is used is interpreted as the predicted value of quality when discretion is zero (i.e.,mean). In the natural metric zero does not have substantive meaning)
reg.data1<-reg.data1 %>% mutate(IndependenceCtrd = Independence - mean(Independence))
reg.data1<-reg.data1 %>% mutate(DiscretionCtrd = Discretion - mean(Discretion)) 
reg.data1<-reg.data1 %>% mutate(CapacityCtrd = Capacity- mean(Capacity)) 
reg.data1<-reg.data1 %>% mutate(YearsCtrd = Years - mean(Years)) 
```

# H1: The relationship between autonomy and quality follows an inverted U as Fukuyama hypothesized. This is particularly true for discretion BUT NOT for independence
```{r katherine.conditional.model1, echo=FALSE}
katherine.conditional.model1 <- lmer(Quality~ IndependenceCtrd+I(IndependenceCtrd^2)+ DiscretionCtrd +I(DiscretionCtrd^2) + (1| AGID),data = reg.data1) #Conditional model1: addressing the first research hypothesis without control varaibles 
summary(katherine.conditional.model1)
```

# Conditional model2: addressing the first research hypothesis after taking the covariates into account 
```{r katherine.conditional.model2, echo=FALSE}
katherine.conditional.model2 <- lmer(Quality~ IndependenceCtrd+ DiscretionCtrd +I(DiscretionCtrd^2) + Appointee+ Gender + YearsCtrd + Education + Fed.District +Party + (1 | AGID),data = reg.data1) # "IndependenceCtrd^2" term was removed becuase was not significant
summary(katherine.conditional.model2)
```

# Table for unconditional model, conditional models 1 and 2 
```{r lmer table 1, echo=FALSE}
# Important notes regarding the table. At the bottom you will see σ squared: variance at  respondent level, τ00: variance at agency level, then you have ICC; N is the number of agencies, observataions: respondents;Marginal and conditional R squared. 
# As you can see the addition of independence and discretion ( linear and quadratic) explained 14.7% of within-clusters variance(i.e., .68-.58/.68).The second conditional model explained just about 3% of the variance 
tab_model(katherine.unconditional.model, katherine.conditional.model1, katherine.conditional.model2, dv.labels = c("lmer.Model0", "lmer.Model1", "lmer.Model2"), file = "Table1.MLM.Models0_1en2.doc")
```
# H2: The relationship between autonomy (i.e., independence and discretion) and government quality changes based on capacity..
# Indepenence by capacity interaction 
```{r katherine.Interaction.model1, echo=FALSE}
katherine.Interaction.model1 <- lmer(Quality ~IndependenceCtrd*CapacityCtrd + I(IndependenceCtrd^2)*CapacityCtrd+ DiscretionCtrd*CapacityCtrd + CapacityCtrd*I(DiscretionCtrd^2) +(1 | AGID),data = reg.data1) 
summary(katherine.Interaction.model1)
```
# Independence by capacity interaction after taking the covariates into account 
```{r katherine.Interaction.model2, echo=FALSE}
katherine.Interaction.model2 <- lmer(Quality ~IndependenceCtrd*CapacityCtrd  + DiscretionCtrd*CapacityCtrd +CapacityCtrd*I(DiscretionCtrd^2) + Appointee+ Gender + YearsCtrd + Education + Fed.District +Party + (1 | AGID),data = reg.data1)
# "IndependenceCtrd^2" term was removed becuase was not significant
summary(katherine.Interaction.model2)
```

# Model requested: use aggregate capacity  and test interaction effect
```{r additional model, echo= FALSE}
# This model addresses your comments on 
reg.data1<-reg.data1 %>%  group_by(AGID) %>% mutate(CapacityAgency = mean(Capacity))

katherine.Interaction.model3 <- lmer(Quality ~IndependenceCtrd*CapacityAgency+ I(IndependenceCtrd^2)*CapacityAgency  + DiscretionCtrd*CapacityAgency +CapacityAgency*I(DiscretionCtrd^2) + (1 | AGID),data = reg.data1)

tab_model(katherine.Interaction.model3)
#reg.data1$CapacityAgency <- aggregate(reg.data1$Capacity, by = list(reg.data1$AGID), mean)
```

# Table for interactions based on multilevel models
```{r lmer table 2, echo=FALSE}
tab_model(katherine.Interaction.model1, katherine.Interaction.model2,dv.labels = c("lmer.InteractionM1", "lmer.InteractionM2"), file = "Table2.MLM.InteractionModels1en2.doc")
```
# How does GEE compare
# GEE model1: addressing the first research hypothesis without control varaibles 
```{r  katherine.GEE.model1, echo=FALSE}
reg.data1 <- reg.data1[order(reg.data1$AGID),] # We need to order by ID
reg.data1a<-reg.data1 # Rename data and standardize independence, capacity and discretion 
reg.data1a$Independence <- c(scale(reg.data1a$Independence))
reg.data1a$Discretion <- c(scale(reg.data1a$Discretion))
reg.data1a$Capacity<- c(scale(reg.data1a$Capacity))
reg.data1a$Education <- as.factor(reg.data1a$Education)

GEE.model1<-formula(Quality~Independence+ I(Independence^2) + Discretion +I(Discretion^2))
katherine.GEE.model1 <- geeglm(GEE.model1,id=AGID, corstr = "exchangeable", family = "gaussian", data=reg.data1a)# The correlation structure is assumed to be exchangable 
summary(katherine.GEE.model1)
```
# GEE model2: addressing the first research hypothesis by including control varaibles
```{r katherine.GEE.model2, echo=FALSE}
GEE.model2<-formula(Quality~Independence + Discretion +I(Discretion^2)+ Appointee+ Gender + Years + Education + Fed.District +Party) # # Independence^2" term was removed because was not significant
katherine.GEE.model2 <- geeglm(GEE.model2,id=AGID, corstr = "exchangeable", family = "gaussian", data=reg.data1a)
summary(katherine.GEE.model2)
```
# GEE resulsts tables: models 1 and 2 ( Main effects)
```{r GEE table 1, echo=FALSE}
tab_model(katherine.GEE.model1, katherine.GEE.model2, dv.labels= c("GEE.Model1","GEE.Model2"), file = "Table2.GEE.Models1en2.doc")
```
# H2:GEE
# Autonomy by capacity interaction 
```{r  katherine.GEE.model3, echo=FALSE}
GEE.model3<-formula(Quality~Independence*Capacity  +I(Independence^2)*Capacity+ Discretion*Capacity + Capacity*I(Discretion^2))
katherine.GEE.model3 <- geeglm(GEE.model3,id=AGID, corstr = "exchangeable", family = "gaussian", data=reg.data1a)
summary(katherine.GEE.model3)
```
# Autonomy by capacity interaction after taking the covariates into account 
```{r  katherine.GEE.model4, echo=FALSE}
GEE.model4<-formula(Quality~Independence*Capacity + Discretion*Capacity + Capacity*I(Discretion^2)+ Appointee+ Gender + Years + Education + Fed.District +Party ) #Independence^2" term was removed because was not significant in the previous model
katherine.GEE.model4 <- geeglm(GEE.model4,id=AGID, corstr = "exchangeable", family = "gaussian", data=reg.data1a)
summary(katherine.GEE.model4)
```
# Gee resulsts tables: models 3 and 4(Interaction effects)
```{r GEE table 2, echo=FALSE }
tab_model(katherine.GEE.model3, katherine.GEE.model4, dv.labels= c("GEE.Model3","GEE.Model4"), file = "Table2.GEE.InteractionModels3en4.doc")
# These two models mimic the lmer models tested above 
```
# Compare GEE models 
```{r Compare GEE models, echo=FALSE}
#Quasi-likelihood Information Criterion (QIC) is analogus to AIC. When comparing two GEE models, a model with lower QIC is preferred. As you can see here, perhaps removing some non-significant control variables is helpful (katherine.GEE.model1 is better than katherine.GEE.model2, same works for the interaction models(3 & 4)). 
mod1<-as.data.frame(QIC(katherine.GEE.model1))
mod2<-as.data.frame(QIC(katherine.GEE.model2))
mod3<-as.data.frame(QIC(katherine.GEE.model3)) 
mod4<-as.data.frame(QIC(katherine.GEE.model4))
QIC.Values<- cbind(mod1, mod2, mod3, mod4)
QIC.Values

```
